R packages.

library(fda)
library(ggplot2)

pm10 <- read.csv("MPM1017.csv")[, -1]
no2 <- read.csv("MNO217.csv")[, -1]
# no2 <- no2[, -c(1, 3)]

plot1 <-
    reshape2::melt(pm10) %>%
    dplyr::select(Station = variable,
                  PM10 = value) %>%
    mutate(X = rep(seq(nrow(pm10)),
                    ncol(pm10))) %>%
    ggplot(aes_string(x = "X",
                      y = "PM10",
                      color = "Station")) +
        geom_line() +
        theme_light() +
        scale_color_viridis_d()
## No id variables; using all as measure variables
plot2 <-
    reshape2::melt(no2) %>%
    dplyr::select(Station = variable,
                  NO2 = value) %>%
    mutate(X = rep(seq(nrow(no2)),
                    ncol(no2))) %>%
    ggplot(aes_string(x = "X",
                      y = "NO2",
                      color = "Station")) +
        geom_line() +
        theme_light() +
        scale_color_viridis_d()
## No id variables; using all as measure variables
plotly::ggplotly(plot1)
plotly::ggplotly(plot2)

Create basis

npm10 <- nrow(pm10)
nno2 <- nrow(no2)

base_bspline <- create.bspline.basis(c(1, npm10), 15)
base_fu <- create.fourier.basis(c(1, nno2), 15)


class(base_bspline)
## [1] "basisfd"
class(base_fu)
## [1] "basisfd"
methods(class = "basisfd")
## [1] *       ==      norder  plot    predict summary
## see '?methods' for accessing help and source code
methods(class = "basisfd")
## [1] *       ==      norder  plot    predict summary
## see '?methods' for accessing help and source code

Regression Spline

Define a Functional Parameter Object

param_lambda <- fdPar(base_bspline, lambda = 0)
fd_smooth <- smooth.basis(argvals = seq(1, npm1o),
                          y = as.matrix(pm10),
                          fdParobj = param_lambda)
methods(class = "fdPar")
## [1] boxplot      coef         coefficients plot         predict     
## [6] summary     
## see '?methods' for accessing help and source code

Smooth basis

fd_smooth <- smooth.basis(argvals = seq(1, npm1o),
                          y = as.matrix(pm10),
                          fdParobj = param_lambda)
methods(class = "fdPar")
## [1] boxplot      coef         coefficients plot         predict     
## [6] summary     
## see '?methods' for accessing help and source code

Spline smoothing

Define a Functional Parameter Object

base_bspline <- create.bspline.basis(c(1, npm10), 15)
param_lambda <- fdPar(base_bspline,
                      2,
                      lambda = 0.1)
methods(class = "fdPar")
## [1] boxplot      coef         coefficients plot         predict     
## [6] summary     
## see '?methods' for accessing help and source code

Smooth basis

fd_smooth <- smooth.basis(argvals = seq(1, npm1o),
                          y = as.matrix(pm10),
                          fdParobj = param_lambda)
methods(class = "fdSmooth")
## [1] as.fd        boxplot      coef         coefficients fitted      
## [6] lines        plot         predict      residuals   
## see '?methods' for accessing help and source code

Lambda Selection.

gcv <- NULL
lambda_list <- seq(-2, 4, length.out = 30)
matrix_data <- as.matrix(pm10)

for (lambda in lambda_list) {
    lambda <- 10**lambda
    param_lambda <- fdPar(base_bspline,
                      2,
                      lambda)
    fd_smooth <- smooth.basis(argvals = seq(1,
                                            nrow(matrix_data)),
                              y = matrix_data,
                              fdParobj = param_lambda)
    gcv <- c(gcv, sum(fd_smooth$gcv))
}

plotgcv <-
  data.frame(log_lambda = lambda_list,
             gcv = gcv) %>%
        ggplot(aes(x = log_lambda, y = gcv)) +
        geom_line(color = "purple", linetype = "dashed") +
        theme_light() +
        scale_x_continuous(limits = c(0, 4))
plotly::ggplotly(plotgcv)
selected_lamdba <- 10**lambda_list[which.min(gcv)]
print(selected_lamdba)
## [1] 573.6153
print(log(selected_lamdba, 10))
## [1] 2.758621
param_lambda <- fdPar(base_bspline,
                      2,
                      lambda = selected_lamdba)

fd_smooth <- smooth.basis(argvals = seq(1,
                                        nrow(matrix_data)),
                          y = matrix_data,
                          fdParobj = param_lambda)

Mean and covariance

summary(fd_smooth)
##         Length Class  Mode   
## fd         3   fd     list   
## df         1   -none- numeric
## gcv       15   -none- numeric
## beta       0   -none- NULL   
## SSE        1   -none- numeric
## penmat   225   -none- numeric
## y2cMap  4515   -none- numeric
## argvals  301   -none- numeric
## y       4515   -none- numeric
summary(fd_smooth$fd)
## Functional data object:
## 
##  Dimensions of the data:
##     time 
##      reps 
##      values 
## 
## Basis object:
## 
##   Type:   bspline 
## 
##   Range:  1  to  301 
## 
##   Number of basis functions:  15 
## 
##   Order of spline:  4 
## [1] "  Interior knots"
##  [1]  26  51  76 101 126 151 176 201 226 251 276
## 
## Coefficient matrix:
##                AJM      ATI      CAM      CHO      HGM      IZT      MER
## bspl4.1  34.110501 31.83064 50.20089 70.37051 39.08153 46.98081 48.34281
## bspl4.2  32.477066 50.98302 73.67716 54.98442 54.73972 34.26232 68.63172
## bspl4.3  49.143241 61.41435 71.62944 93.33580 66.47449 46.84670 83.83257
## bspl4.4  48.240887 54.91478 66.26488 49.29242 39.09159 41.95314 60.22371
## bspl4.5  36.784271 52.46350 51.01799 54.29874 50.12077 33.54044 44.38670
## bspl4.6  59.387184 60.82071 92.89793 74.44615 61.28273 53.34583 87.00475
## bspl4.7  27.305529 25.19857 41.28708 51.23261 31.57539 30.85612 50.72208
## bspl4.8  31.299420 41.68267 49.76532 66.30478 36.75911 30.50820 50.16534
## bspl4.9  32.796806 35.38682 33.98313 41.03040 28.41548 27.18365 38.96741
## bspl4.10 32.218024 32.11503 51.00969 29.57862 40.85753 31.32665 53.96163
## bspl4.11 34.189246 35.81296 35.78263 67.72373 26.42651 29.74819 37.76812
## bspl4.12  4.960169 19.64149 25.80655 14.28972 23.97862 25.31323 31.93691
## bspl4.13 55.665066 53.06209 62.38848 49.59352 40.80872 34.99670 59.29485
## bspl4.14 21.586488 24.83213 38.32911 67.65709 26.12782 29.63947 33.50390
## bspl4.15 11.311397 18.72002 48.88571 31.16112 38.53841 28.15478 53.17658
##               MPA       SAG      SFE      TAH      TLA      TLI      VIF
## bspl4.1  51.17289  72.45661 39.09742 66.06110 46.68763 44.31135 62.97789
## bspl4.2  41.84901  48.31982 31.46336 20.95821 65.66030 78.37847 91.99489
## bspl4.3  58.39260 129.71638 51.41534 75.70928 68.30748 47.21604 54.91774
## bspl4.4  54.61263  67.70804 44.39371 62.12911 57.93407 77.52482 84.87685
## bspl4.5  44.67746  52.30892 41.32273 36.16466 61.72442 52.55434 64.63194
## bspl4.6  52.43237  88.65200 60.51195 70.21679 80.12129 70.34230 71.85168
## bspl4.7  30.66939  44.00495 25.87816 38.15850 31.67644 37.27066 38.17742
## bspl4.8  43.40051  50.28220 31.91501 47.66555 50.61078 41.35442 48.41801
## bspl4.9  31.35076  40.09806 30.48199 30.87800 34.96751 43.94444 42.59748
## bspl4.10 31.98917  40.07873 34.94306 37.01859 48.11265 23.84916 34.69820
## bspl4.11 48.72591  39.56231 30.06635 34.98404 38.78770 58.67257 59.12485
## bspl4.12 10.93356  40.09187 11.55308 42.44577 25.28165 23.66874 34.93287
## bspl4.13 50.63609  39.75644 61.16406 33.40438 68.87154 43.97740 31.80344
## bspl4.14 26.86598  49.54702 27.39930 47.14853 51.07971 37.14566 33.66057
## bspl4.15 10.16847  52.15154 22.33607 22.39165 45.08720 34.48901 49.86364
##                XAL
## bspl4.1   57.58899
## bspl4.2  104.95805
## bspl4.3   86.77519
## bspl4.4   85.78762
## bspl4.5   60.26297
## bspl4.6  121.60980
## bspl4.7   48.09534
## bspl4.8   50.26583
## bspl4.9   54.19862
## bspl4.10  40.21468
## bspl4.11  51.42980
## bspl4.12  45.11898
## bspl4.13  54.03768
## bspl4.14  41.01061
## bspl4.15  91.97008
class(fd_smooth$fd)
## [1] "fd"
methods(class = "fd")
##  [1] -            [            *            ^            +           
##  [6] boxplot      coef         coefficients density      deriv       
## [11] fRegress     lines        mean         norder       plot        
## [16] predict      sqrt         sum          summary     
## see '?methods' for accessing help and source code
mean_function <- mean.fd(fd_smooth$fd)
plot(mean_function)

## [1] "done"
variance_fda <- function(variance_surface,
                         nda,
                         npunt,
                         x_lab = "X",
                         y_lab = "Y",
                         z_lab = "Z") {

    eva <- seq(1, nda, length = npunt)
    super_eval <- eval.bifd(eva, eva, variance_surface);
    persp(eva, eva, super_eval,
        theta = -45, phi = 25, r = 3, expand = 0.5,
        ticktype = "detailed",
        xlab = x_lab,
        ylab = y_lab,
        zlab = z_lab)
}


var_fd <- var.fd(fd_smooth$fd)
variance_fda(var.fd(fd_smooth$fd), npm1o, 200)

FPCA

pcapm10 <- pca.fd(fd_smooth$fd,
                nharm = 2,
                harmfdPar = fdPar(fd_smooth$fd))

str(pcapm10)
## List of 5
##  $ harmonics:List of 3
##   ..$ coefs  : num [1:15, 1:2] 0.049 0.1122 0.0917 0.0688 0.0415 ...
##   .. ..- attr(*, "dimnames")=List of 2
##   .. .. ..$ : chr [1:15] "bspl4.1" "bspl4.2" "bspl4.3" "bspl4.4" ...
##   .. .. ..$ : chr [1:2] "PC1" "PC2"
##   ..$ basis  :List of 10
##   .. ..$ call       : language basisfd(type = type, rangeval = rangeval, nbasis = nbasis, params = params,      dropind = dropind, quadvals = qu| __truncated__
##   .. ..$ type       : chr "bspline"
##   .. ..$ rangeval   : num [1:2] 1 301
##   .. ..$ nbasis     : num 15
##   .. ..$ params     : num [1:11] 26 51 76 101 126 151 176 201 226 251 ...
##   .. ..$ dropind    : NULL
##   .. ..$ quadvals   : NULL
##   .. ..$ values     : list()
##   .. ..$ basisvalues: list()
##   .. ..$ names      : chr [1:15] "bspl4.1" "bspl4.2" "bspl4.3" "bspl4.4" ...
##   .. ..- attr(*, "class")= chr "basisfd"
##   ..$ fdnames:List of 3
##   .. ..$ : chr [1:15] "bspl4.1" "bspl4.2" "bspl4.3" "bspl4.4" ...
##   .. ..$ : chr [1:2] "PC1" "PC2"
##   .. ..$ : chr "values"
##   ..- attr(*, "class")= chr "fd"
##  $ values   : num [1:15] 25004 1765 1395 1017 539 ...
##  $ scores   : num [1:15, 1:2] -214 -105 100 107 -116 ...
##  $ varprop  : num [1:2] 0.813 0.0574
##  $ meanfd   :List of 3
##   ..$ coefs  : num [1:15, 1] 50.8 56.9 69.7 59.7 49.1 ...
##   .. ..- attr(*, "dimnames")=List of 2
##   .. .. ..$ : NULL
##   .. .. ..$ : chr "mean"
##   ..$ basis  :List of 10
##   .. ..$ call       : language basisfd(type = type, rangeval = rangeval, nbasis = nbasis, params = params,      dropind = dropind, quadvals = qu| __truncated__
##   .. ..$ type       : chr "bspline"
##   .. ..$ rangeval   : num [1:2] 1 301
##   .. ..$ nbasis     : num 15
##   .. ..$ params     : num [1:11] 26 51 76 101 126 151 176 201 226 251 ...
##   .. ..$ dropind    : NULL
##   .. ..$ quadvals   : NULL
##   .. ..$ values     : list()
##   .. ..$ basisvalues: list()
##   .. ..$ names      : chr [1:15] "bspl4.1" "bspl4.2" "bspl4.3" "bspl4.4" ...
##   .. ..- attr(*, "class")= chr "basisfd"
##   ..$ fdnames:List of 3
##   .. ..$ time  : int [1:301] 1 2 3 4 5 6 7 8 9 10 ...
##   .. ..$ reps  : chr "mean"
##   .. ..$ values: chr "mean value"
##   ..- attr(*, "class")= chr "fd"
##  - attr(*, "class")= chr "pca.fd"
class(pcapm10)
## [1] "pca.fd"
methods(class = "pca.fd")
## [1] plot
## see '?methods' for accessing help and source code
plot(pcapm10)

print(pcapm10$scores)
##             [,1]        [,2]
##  [1,] -213.76428   12.064608
##  [2,] -104.94160  -21.122787
##  [3,]  100.45805   41.592457
##  [4,]  107.48669   24.076751
##  [5,] -116.31566    8.395449
##  [6,] -222.05214    5.067746
##  [7,]  107.77446   53.016242
##  [8,] -133.86990  -36.497651
##  [9,]  185.99390   -3.479708
## [10,] -193.41326   33.565043
## [11,]  -48.29056    2.572591
## [12,]   60.84631   43.467741
## [13,]   18.78715  -63.602167
## [14,]  115.87097 -110.137182
## [15,]  335.42988   11.020866
print(pcapm10$params)
## NULL
plot(pcapm10$scores[, 1], pcapm10$scores[, 2])

str(pcapm10$varprop)
##  num [1:2] 0.813 0.0574

Functional BOX_PLOT

ll <- boxplot.fd(fd_smooth$fd,  method = "MBD")

ll$depth
##       AJM       ATI       CAM       CHO       HGM       IZT       MER 
## 0.3075908 0.5194719 0.4793022 0.4221594 0.4780764 0.2380009 0.4279114 
##       MPA       SAG       SFE       TAH       TLA       TLI       VIF 
## 0.4585573 0.4244224 0.3338048 0.5537954 0.5008015 0.5489863 0.4310231 
##       XAL 
## 0.2094295
ll$medcurve
## TAH 
##  11
cov.fun = function(d,k,c,mu){
    k*exp(-c*d^mu)
}
n=50
p=30
t=seq(0,1,len=p)
d=dist(t,upper=TRUE,diag=TRUE)
d.matrix=as.matrix(d)                  # 
#covariance function in time
t.cov = cov.fun(d.matrix,1,1,1)
# Cholesky Decomposition
L=chol(t.cov)
mu=4*t
e=matrix(rnorm(n*p),p,n)
ydata = mu+t(L)%*%e

#functional boxplot
fbplot(ydata, method='MBD', ylim=c(-11,15))

## $depth
##  [1] 0.22753741 0.39063946 0.17937415 0.34802721 0.47711565 0.40315646
##  [7] 0.40821769 0.07385034 0.44032653 0.40642177 0.47918367 0.34682993
## [13] 0.47308844 0.39961905 0.37921088 0.40723810 0.24440816 0.21921088
## [19] 0.37148299 0.42634014 0.46628571 0.21142857 0.30302041 0.33703401
## [25] 0.42029932 0.29034014 0.37948299 0.42835374 0.46013605 0.37365986
## [31] 0.16125170 0.48146939 0.48832653 0.28065306 0.32195918 0.43635374
## [37] 0.47542857 0.38862585 0.22840816 0.23140136 0.39662585 0.48146939
## [43] 0.47200000 0.38563265 0.21648980 0.24549660 0.36832653 0.44598639
## [49] 0.34911565 0.37365986
## 
## $outpoint
## [1] 8
## 
## $medcurve
## [1] 33
##
## Model 2 with outliers
##
#magnitude
k=6
#randomly introduce outliers
C=rbinom(n,1,0.1)
s=2*rbinom(n,1,0.5)-1
cs.m=matrix(C*s,p,n,byrow=TRUE)
e=matrix(rnorm(n*p),p,n)
y=mu+t(L)%*%e+k*cs.m
#functional boxplot
fbplot(y,method='MBD',ylim=c(-11,15))

## $depth
##  [1] 0.05420408 0.47559184 0.43771429 0.40604082 0.46759184 0.40429932
##  [7] 0.47025850 0.41170068 0.45855782 0.40228571 0.36936054 0.47749660
## [13] 0.47210884 0.07776871 0.38062585 0.31597279 0.44718367 0.31902041
## [19] 0.15221769 0.46438095 0.44440816 0.47842177 0.20952381 0.49774150
## [25] 0.07765986 0.48364626 0.12870748 0.47765986 0.45942857 0.42672109
## [31] 0.10405442 0.04391837 0.34688435 0.47531973 0.38993197 0.27096599
## [37] 0.31727891 0.45518367 0.43787755 0.41714286 0.17289796 0.28925170
## [43] 0.43874830 0.29317007 0.20435374 0.50454422 0.39151020 0.44740136
## [49] 0.44919728 0.40206803
## 
## $outpoint
## [1]  1 14 19 25 27 31 32 41
## 
## $medcurve
## [1] 46